missing <- function(x){
  x[is.na(x)] <- mean(x, na.rm = TRUE)
  x
}

z <- function(x){
  if(sd(x) == 0) return(x)
  scale(x)
}

# Iv regression with Moran eigenvectors
iv.reg <- function(y = NULL, Model, data, weights = data$w, Moran = TRUE, scale = TRUE){
  library(lfe)
  library(Formula)
  library(spdep)
  library(splines)
  Fit.moran <- NULL
  if(is.null(weights)) weights <- rep(1, nrow(data))
  data$w <- weights
  if(!is.null(y)) data$y <- data@data[, y]
  if(!scale) Model <- update(Model, y ~ .)
  iv <- ivm <- felm(Model, data = data, weights = data$w, keepCX=TRUE, bootcluster = "model")
  ndata <- subset(data, !rownames(data@data)%in%names(iv$na.action))
  if (Moran){
    Fit.moran <- SpatialFiltering(iv$cY ~ iv$cX, nb = poly2nb(ndata), style = "W", ExactEV = TRUE, zero.policy = TRUE, alpha = 0.05)
    ndata$Moran <- fitted(Fit.moran)
    ivm <- felm(update(Model, . ~ . + Moran), data = ndata, weights = ndata$w, keepCX=TRUE)
  }
  e <- residuals(ivm)
  if(!is.null(ivm$stage1)) e <- ivm$iv.residuals
  if(sd(e) > 0) p <- moran.test(e*ndata$w, listw = nb2listw(poly2nb(ndata)), zero.policy = TRUE, randomisation = TRUE)$p.value
  if(sd(e) == 0) p <- NA
  list(iv = iv, ivm = ivm, p = p, y = y, Fit.moran = Fit.moran, Model = Model)
}


# Wrapper
Iv.reg <- function(y = NULL, Model, data, ...){
  
  
  if(!is.list(Model)) Model <- list(Model)
  
  parallel <- formals(iv.reg)$Moran
  
  if(parallel){
    cl  <- makeCluster(detectCores() - 1)
    clusterExport(cl, c("iv.reg", ls()), envir=environment())
    out <- parLapply(cl, Model, function(z) lapply(y, iv.reg, Model = z, data = data, ...))
    stopCluster(cl)
    return(out)
  }
  
  if(!parallel){
    out <- lapply(Model, function(z) lapply(y, iv.reg, Model = z, data = data, ...))
    return(out)
  }
}


# Functions for latex output
reg_latex <- function(x, var = NULL){
    f <- function(x){
      if(is.null(var)) var <- names(coefficients(x))
      w <- match(var, names(coefficients(x)))
      z <- matrix(summary(x, robust = TRUE)$coefficients[w, ], nrow = length(var))
      p <- c("{**}", "{*}", "{\\dagger}", "{}")[cut(z[,4], c(-1, 0.01, 0.05, .1, 1), labels=1:4)]
      rbind(cbind(var, paste(roundr(z[,1], 2), "~(", roundr(z[,2], 2), ")^", p, sep = "")), c("Observations", paste("\\multicolumn{1}{c}{", x$N, "}")))
    }
    lapply(x, f)
}

temp <- function(x, var = "famine"){
  f <- function(x){
    w <- grep(var, names(coefficients(x$ivm)))
    z <- do.call("cbind", x$ivm[c("coefficients", "cse", "cpval")])[w,]
    p <- c("{**}", "{*}", "{\\dagger}", "{}")[cut(z[3], c(-1, 0.01, 0.05, .1, 1), labels=1:4)]
    f <- x$ivm$stage1$rob.iv1fstat[[1]][5]
    # Adjust F-statistic if weather index is used:
    if(is.null(f)) { f <- 0 
    } else if (any(grepl("W_BMA_averaged", x$ivm$stage1$instruments))) f <- f/Q 
    c(x$y, paste(roundr(z[1], 2), "~(", roundr(z[2], 2), ")^", p, sep = ""), roundr(f, 2), roundr(x$p, 2))
  }
  do.call("rbind", lapply(x, f))
}

out.table <- function(tab, labels = labels, title, label = "", caption = "", note = NULL, col.names = c("IV coefficient (S.E.)" , "First-stage F-statistic", "Moran's I (p-value)"), add.row = NULL, row.names = labels, ...){
  
  tab[,1] <- row.names
  z <- apply(tab[,-1], 2, function(x) max(sapply(x, function(x) nchar(gsub("\\^|\\}|\\{|\\)|\\(|\\.|\\*|*", "", regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][2])))))
  z <- apply(tab[,-1], 2, function(x) max(sapply(x, function(x) nchar(gsub("[^0-9]", "", regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][2])))))
  d <- paste("d{", rep(2, 3), ".", z, "}", sep = "") 
  L <- 10/length(z)
  g <- print(xtable(tab,
               caption=caption, label = label,
               align = c("l", "l", d)),
        sanitize.text.function = identity,
        hline.after = NULL,
        include.rownames = FALSE,
        include.colnames = FALSE,
        caption.placement = 'bottom', 
        add.to.row = list(
          pos = as.list(c(-1, 0, 0, nrow(tab), add.row$pos)),
          command = c("\\toprule\n", paste("\\multicolumn{1}{l}{Dependent variable} &", paste( paste("\\multicolumn{1}{C{", L, "cm}}{", col.names, "}"), collapse = " & "), " \\\\\n") , "\\midrule\n", "\\bottomrule\n", add.row$command)),
        file = paste("~/Dropbox/Holodomor/latex/tables", title, sep = "/"), ...)
  cat(gsub("\\end{tabular}\n", paste("\\end{tabular}\n\\caption*{{\\footnotesize ", paste(note, " Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.\n}", sep = ""), "}\n", sep = ""), g, fixed = TRUE), sep = "\n", file =  paste("~/Dropbox/Holodomor/latex/tables", title, sep = "/"))
}


# Functionality to add proper endnotes to tables using \caption*
my.print.xtable <- function(x, ... , end.note = NULL, file = ""){
  g <- print(x, file = "")
  cat(gsub("\\end{tabular}\n", paste("\\end{tabular}\n\\caption*{{\\footnotesize ", end.note, "}}\n", sep = ""), g, fixed = TRUE), sep = "\n", file = file)
}

# Render figure in tikz
# Render figure in tikz
tikZ <- function(graph, title = NULL, path = NULL, ...){
  library(tikzDevice)
  if(is.null(path)) path <- getwd()
  if(is.null(title)) title <- "plot"
  ptitle <- paste(title, "tex", sep = ".")
  init <- getwd()
  setwd(path)
  files  <-  list.files()
  options(tikzMetricPackages = c("\\usepackage[utf8]{inputenc}", "\\usepackage[T1]{fontenc}", "\\usetikzlibrary{calc}", "\\usepackage{amssymb}", "\\usepackage{upgreek}", "\\usepackage{bm}", "\\usepackage{bbm}"))
  tikz(ptitle, standAlone = TRUE,packages = c("\\usepackage{tikz}", "\\usepackage[active,tightpage,psfixbb]{preview}", "\\PreviewEnvironment{pgfpicture}", "\\setlength\\PreviewBorder{0pt}", "\\usepackage{amssymb}", "\\usepackage{upgreek}", "\\usepackage{bm}", "\\usepackage{bbm}"), ...)
  print(graph)
  dev.off()
  tools::texi2pdf(ptitle)
  
  # Clean up
  files <- setdiff(list.files(), files)
  if(length(files) == 0) files <- grep(title, list.files(), value = TRUE)
  files <- files[!grepl("\\.pdf", files)]
  sapply(1:length(files), function(x) if (file.exists(files[x])) file.remove(files[x]))
  
  # Go back to the original directory
  setwd(init)
}

# Clean rounding
roundr <- function(x, n = 2) {
  f <- function(x, n) sprintf(paste("%.", n, "f", sep=""), round(x, n))
  if (is.null(dim(x))) out <- f(x, n)
  if (!is.null(dim(x))) out <- apply(x, 2, f, n = n)
  out
}

#### strsplit and retain the n'th element

strsplitm <- function(x, w, n) unlist(lapply(strsplit(x, w), function(x) x[n]))
strsplitmn <- function(x, w, n) ifelse(grepl(
  w, x), strsplitm(x, w, n), "")




#########################################
# Summary stats
#########################################

sumstat <- function(X,varz,labz=NULL,median=FALSE,meancount=FALSE,rnd=3){
  sumtab <- data.frame(
    Variable=varz,
    Range=paste0("(",apply(X[,varz],2,function(x){round(min(as.numeric(as.character(x)),na.rm=T),rnd)}),",",apply(X[,varz],2,function(x){round(max(as.numeric(as.character(x)),na.rm=T),rnd)}),")"),
    Mean=apply(X[,varz],2,function(x){round(mean(as.numeric(as.character(x)),na.rm=T),rnd)}),
    SD=apply(X[,varz],2,function(x){round(sd(as.numeric(as.character(x)),na.rm=T),rnd)}),
    Missing=round(apply(X[,varz],2,function(x){mean(is.na(as.numeric(as.character(x))),na.rm=T)})*100,0),
    N=nrow(X)
  )
  if(median){
    sumtab <- data.frame(
      Variable=varz,
      Range=paste0("(",apply(X[,varz],2,function(x){round(min(as.numeric(as.character(x)),na.rm=T),rnd)}),",",apply(X[,varz],2,function(x){round(max(as.numeric(as.character(x)),na.rm=T),rnd)}),")"),
      Median=apply(X[,varz],2,function(x){round(median(as.numeric(as.character(x)),na.rm=T),rnd)}),
      SD=apply(X[,varz],2,function(x){round(sd(as.numeric(as.character(x)),na.rm=T),rnd)}),
      Missing=round(apply(X[,varz],2,function(x){mean(is.na(as.numeric(as.character(x))),na.rm=T)})*100,0),
      N=nrow(X)
    )
  }
  
  if(meancount){sumtab$Mean[sumtab$Range%in%"(0,1)"]<-apply(X[,varz[sumtab$Range%in%"(0,1)"]],2,function(x){round(sum(as.numeric(as.character(x)),na.rm=T)/length(x),rnd)})}
  if(length(labz)){sumtab$Variable<-labz}
  sumtab$Missing <- paste0(sumtab$Missing,"%")
  return(sumtab)
}


#########################################
## Weighted mean for matching estimates
#########################################
att.wm <- function(att.mat){
  att.mat$TEST <- paste0(att.mat$Y,"_",att.mat$treat,"_",att.mat$Subgroup)
  testz <- as.character(unique(att.mat$TEST))
  t0 <- 2
  # weighted mean of ATTs
  att.list <- lapply(seq_along(testz),function(t0){#print(t0)
    att.0 <- att.mat[att.mat$TEST%in%testz[t0],]
    att.0$N_after <- as.numeric(as.character(att.0$N_after))
    att.0$N_before <- as.numeric(as.character(att.0$N_before))
    if(sum(!is.na(att.0$ATT))>0){
      att.0 <- att.0[!is.na(att.0$ATT),]
      # percent improvement vs. percent data loss (standardized distances)
      bw <- 1   # relative weight of balance improvement vs. data loss
      bi_w <- (att.0$Bal_before-att.0$Bal_after)/att.0$Bal_before
      if(sd(bi_w)>0){bi_w <- scale(bi_w)}
      dl_w <- (att.0$N_before-att.0$N_after)/att.0$N_before
      if(sd(dl_w)>0){dl_w <- scale(dl_w)}
      loc_match <- data.frame(pct_bi = bw*bi_w, pct_dl = dl_w)
      loc_match[is.na(loc_match$pct_bi),"pct_bi"] <- mean(loc_match[!is.na(loc_match$pct_bi),"pct_bi"])
      loc_match[is.na(loc_match$pct_dl),"pct_dl"] <- mean(loc_match[!is.na(loc_match$pct_dl),"pct_dl"])
      loc_ideal <- data.frame(pct_bi = max(loc_match$pct_bi,na.rm=T), pct_dl = min(loc_match$pct_dl,na.rm=T))
      dist_match <- gDistance(SpatialPoints(loc_match),SpatialPoints(loc_ideal),byid = T,hausdorff=TRUE)
      w_d <- 1/dist_match
      w_d[is.infinite(w_d)] <- 1
      att.w <- data.frame(
        Outcome = as.character(att.0$Y[1]),
        Treat = as.character(att.0$treat[1]),
        Subgroup = as.character(att.0$Subgroup[1]),
        ATT = weighted.mean(att.0$ATT,w=w_d),
        SE = weighted.mean(att.0$SE,w=w_d),
        CI_lower = weighted.mean(att.0$ATT,w=w_d)-1.96*weighted.mean(att.0$SE,w=w_d),
        CI_upper = weighted.mean(att.0$ATT,w=w_d)+1.96*weighted.mean(att.0$SE,w=w_d),
        N_before = mean(att.0$N_before),
        N_after = weighted.mean(att.0$N_after,w=w_d),
        Bal_before = mean(att.0$Bal_before),
        Bal_after = weighted.mean(att.0$Bal_after,w=w_d),
        Bal_pct = weighted.mean((att.0$Bal_before-att.0$Bal_after)/att.0$Bal_before,w=w_d)
      )
    }
    if(sum(!is.na(att.0$ATT))==0){print(paste0("Missing: ",testz[t0]))
      # percent improvement vs. percent data loss (standardized distances)
      att.w <- data.frame(
        Outcome = as.character(att.0$Y[1]),
        Treat = as.character(att.0$treat[1]),
        Subgroup = as.character(att.0$Subgroup[1]),
        ATT = NA,
        SE = NA,
        CI_lower = NA,
        CI_upper = NA,
        N_after = NA,
        Bal_after = NA,
        Bal_pct = NA,
        stringsAsFactors = FALSE
      )
    }
    att.w
  })
  att.mat0 <- do.call(rbind,att.list)
  return(att.mat0)
}